/*
 * This file is part of the URI Template library.
 *
 * For licensing information please see the file license.txt included in the release.
 * A copy of this licence can also be found at
 *   http://www.opensource.org/licenses/artistic-license-2.0.php
 */
package org.weborganic.furi;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.Normalizer;
import java.text.Normalizer.Form;

/**
 * An encoder/decoder for use by URI templates.
 *
 * Only unreserved characters according to RFC 3986 do not need to be encoded
 * within a variable:
 *
 * <pre>
 * unreserved = ALPHA / DIGIT / '-' / '.' / '_' / '&tilde;';
 * </pre>
 *
 * <p>
 * This encoder/decoder should be designed so that URI which contain only
 * unreserved characters are processed faster.
 *
 * @see <a href="http://tools.ietf.org/html/rfc3986">RFC 3986 - Uniform Resource
 *      Identifier (URI): Generic Syntax<a/>
 * @see <a href="http://tools.ietf.org/html/rfc3986#appendix-A">RFC 3986 -
 *      Uniform Resource Identifier (URI): Generic Syntax - Appendix A.
 *      Collected ABNF for URI</a>
 * @see <a href=
 *      "http://www.unicode.org/unicode/reports/tr15/tr15-23.html#Specification">UAX
 *      #15: Unicode Normalization</a>
 *
 * @author Christophe Lauret
 * @version 11 June 2009
 */
public class URICoder {

	/**
	 * The UTF8 character set for reuse - Always defined.
	 */
	private final static Charset UTF8 = Charset.forName("UTF-8");

	/**
	 * The hexadecimal digits for use by the encoder.
	 */
	private final static char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D',
			'E', 'F' };

	/**
	 * Prevents creation of instances.
	 */
	private URICoder() {
	}

	// Encoder
	// ==========================================================================

	/**
	 * Encodes the string as valid URI fragment.
	 * 
	 * <p>
	 * This encoder will encode all but unreserved characters using the escape
	 * sequence.
	 * 
	 * @param s
	 *            The string to encode.
	 * 
	 * @return The corresponding encoded string.
	 */
	public static String encode(String s) {
		// invoke encode method with character that we know does not require
		// encoding
		return encode(s, '0');
	}

	/**
	 * Encodes the string as valid URI fragment.
	 * 
	 * <p>
	 * This encoder will percent-encode all but <em>unreserved</em> characters.
	 * 
	 * @param s
	 *            The string to encode.
	 * @param c
	 *            An ASCII character that should not be encoded if found in the
	 *            string.
	 * 
	 * @return The corresponding encoded string.
	 */
	public static String encode(String s, char c) {
		if (s.length() == 0) {
			return s;
		}
		// Check whether we need to use UTF-8 encoder
		boolean ascii = isASCII(s);

		return ascii ? encode_ASCII(s, c) : encode_UTF8(s, c);
	}

	/**
	 * Encodes the string as valid URI fragment.
	 * 
	 * <p>
	 * This encoder will percent-encode all but <em>illegal</em> characters.
	 * 
	 * @param s
	 *            The string to encode.
	 * 
	 * @return The corresponding encoded string.
	 */
	public static String minimalEncode(String s) {
		if (s.length() == 0) {
			return s;
		}
		// Check whether we need to use UTF-8 encoder
		boolean ascii = isASCII(s);

		return ascii ? minimalEncode_ASCII(s) : minimalEncode_UTF8(s);
	}

	/**
	 * Encodes a string containing only ASCII characters.
	 * 
	 * @param s
	 *            The string the encode (assuming ASCII characters only)
	 * @param e
	 *            A character that does not require encoding if found in the
	 *            string.
	 */
	private static String encode_ASCII(String s, char e) {
		StringBuffer sb = new StringBuffer();

		for (char c : s.toCharArray()) {
			if (isUnreserved(c) || c == e) {
				sb.append(c);
			} else {
				appendEscape(sb, c);
			}
		}
		return sb.toString();
	}

	/**
	 * Encodes a string containing only ASCII characters.
	 * 
	 * @param s
	 *            The string the encode (assuming ASCII characters only)
	 */
	private static String minimalEncode_ASCII(String s) {
		StringBuffer sb = new StringBuffer();

		for (char c : s.toCharArray()) {
			if (isLegal(c)) {
				sb.append(c);
			} else {
				appendEscape(sb, c);
			}
		}
		return sb.toString();
	}

	/**
	 * Encodes a string containing non ASCII characters using an UTF-8 encoder.
	 * 
	 * @param s
	 *            The string the encode (assuming ASCII characters only)
	 * @param e
	 *            A character that does not require encoding if found in the
	 *            string.
	 */
	private static String encode_UTF8(String s, char e) {
		// TODO: Normalizer requires Java 6!
		String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
		// convert String to UTF-8
		ByteBuffer bb = UTF8.encode(n);
		// URI encode
		StringBuffer sb = new StringBuffer();

		while (bb.hasRemaining()) {
			int b = bb.get() & 0xff;

			if (isUnreserved(b) || b == e) {
				sb.append((char) b);
			} else {
				appendEscape(sb, (byte) b);
			}
		}
		return sb.toString();
	}

	/**
	 * Encodes a string containing non ASCII characters using an UTF-8 encoder.
	 * 
	 * @param s
	 *            The string the encode (assuming ASCII characters only)
	 */
	private static String minimalEncode_UTF8(String s) {
		// TODO: Normalizer requires Java 6!
		String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
		// convert String to UTF-8
		ByteBuffer bb = UTF8.encode(n);
		// URI encode
		StringBuffer sb = new StringBuffer();

		while (bb.hasRemaining()) {
			int b = bb.get() & 0xff;

			if (isLegal(b)) {
				sb.append((char) b);
			} else {
				appendEscape(sb, (byte) b);
			}
		}
		return sb.toString();
	}

	// Decoder
	// ==========================================================================

	/**
	 * Decode the string as valid URI fragment.
	 * 
	 * @param s
	 *            The string to decode.
	 * 
	 * @return The corresponding decoded string.
	 */
	public static String decode(String s) {
		if (s.length() == 0 || (s.indexOf('%') < 0 && s.indexOf('+') < 0)) {
			return s;
		}
		// Check whether we need to convert to UTF-8 encoder
		boolean ascii = isEncodedASCII(s);

		return ascii ? decode_ASCII(s) : decode_UTF8(s);
	}

	/**
	 * Decodes a string containing only ASCII characters.
	 */
	private static String decode_ASCII(String s) {
		StringBuffer sb = new StringBuffer();

		for (int i = 0; i < s.length(); i++) {
			char c = s.charAt(i);

			if (c == '%') {
				if (i < s.length() - 2) {
					String hex = String.copyValueOf(new char[] { s.charAt(++i), s.charAt(++i) });
					char x = (char) Integer.parseInt(hex, 16);

					sb.append(x);
				}
				// TODO: handle error condition
			} else if (c == '+') {
				sb.append(' ');
			} else {
				sb.append(c);
			}
		}
		return sb.toString();
	}

	/**
	 * Decodes a string containing non ASCII characters using an UTF-8 decoder.
	 */
	private static String decode_UTF8(String s) {
		// URI decode
		ByteBuffer bb = ByteBuffer.allocate(s.length());

		for (int i = 0; i < s.length(); i++) {
			char c = s.charAt(i);

			if (c == '%') {
				if (i < s.length() - 2) {
					String hex = "" + s.charAt(++i) + s.charAt(++i);
					byte b = (byte) (Integer.parseInt(hex, 16));

					bb.put(b);
				}
			} else if (c == '+') {
				bb.put((byte) ' ');
			} else {
				// TODO: could there be also non-ASCII characters that should
				// have been encoded?
				bb.put((byte) c);
			}
		}
		bb.limit(bb.position());
		bb.position(0);
		return UTF8.decode(bb).toString();
	}

	/**
	 * Appends the escape sequence for the given byte to the specified string
	 * buffer.
	 * 
	 * @param sb
	 *            The string buffer.
	 * @param b
	 *            The byte to escape.
	 */
	private static void appendEscape(StringBuffer sb, byte b) {
		sb.append('%');
		sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
		sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
	}

	/**
	 * Appends the escape sequence for the given byte to the specified string
	 * buffer.
	 * 
	 * @param sb
	 *            The string buffer.
	 * @param c
	 *            The byte to escape.
	 */
	private static void appendEscape(StringBuffer sb, char c) {
		sb.append('%');
		sb.append(HEX_DIGITS[(c >> 4) & 0x0f]);
		sb.append(HEX_DIGITS[(c >> 0) & 0x0f]);
	}

	/**
	 * Indicates whether the character is unreserved of not.
	 * 
	 * @param c
	 *            The character to test.
	 * 
	 * @return <code>true</code> if it is unreserved; <code>false</code>
	 *         otherwise.
	 */
	private static boolean isUnreserved(int c) {
		// ALPHA (lower)
		if (c >= 'a' && c <= 'z') {
			return true;
			// ALPHA (UPPER)
		} else if (c >= 'A' && c <= 'Z') {
			return true;
			// DIGIT
		} else if (c >= '0' && c <= '9') {
			return true;
		} else if (c == '.' || c == '_' || c == '-' || c == '~') {
			return true;
		}
		return false;
	}

	/**
	 * Indicates whether the character is unreserved of not.
	 * 
	 * @param c
	 *            The character to test.
	 * 
	 * @return <code>true</code> if it is unreserved; <code>false</code>
	 *         otherwise.
	 */
	private static boolean isLegal(int c) {
		// Filter out [<26]
		if (c < '&' && c != '!' && c != '#' && c != '$') {
			return false;
			// Filter out [>7A]
		} else if (c >= '{' && c != '~') {
			return false;
			// Handle [26-7A] and '!', '#', '$', '~'
		} else if (c == '`' || c == '<' || c == '>' || c == '\\' || c == '^') {
			return false;
		}
		return true;
	}

	/**
	 * Indicates whether the string contains non-ASCII characters.
	 */
	private static boolean isASCII(String s) {
		for (int i = 0; i < s.length(); i++) {
			if (s.charAt(i) >= 0x80) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Indicates whether the encoded string contains non-ASCII characters.
	 */
	private static boolean isEncodedASCII(String s) {
		for (int i = 0; i < s.length(); i++) {
			if (s.charAt(i) == '%' && i < s.length() - 1 && s.charAt(i + 1) > '7') {
				return false;
			}
		}
		return true;
	}

}
